# importing libraries
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.core.display import display, HTML
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import folium
import plotly.graph_objects as go
import seaborn as sns
import ipywidgets as widgets
# loading data right from the source:
death_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
recovered_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
country_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv')
confirmed_df.head()
| Province/State | Country/Region | Lat | Long | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | ... | 11/15/20 | 11/16/20 | 11/17/20 | 11/18/20 | 11/19/20 | 11/20/20 | 11/21/20 | 11/22/20 | 11/23/20 | 11/24/20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 43240 | 43403 | 43628 | 43851 | 44228 | 44443 | 44503 | 44706 | 44988 | 45280 |
| 1 | NaN | Albania | 41.15330 | 20.168300 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 27830 | 28432 | 29126 | 29837 | 30623 | 31459 | 32196 | 32761 | 33556 | 34300 |
| 2 | NaN | Algeria | 28.03390 | 1.659600 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 67679 | 68589 | 69591 | 70629 | 71652 | 72755 | 73774 | 74862 | 75867 | 77000 |
| 3 | NaN | Andorra | 42.50630 | 1.521800 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 5872 | 5914 | 5951 | 6018 | 6066 | 6142 | 6207 | 6256 | 6304 | 6351 |
| 4 | NaN | Angola | -11.20270 | 17.873900 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 13451 | 13615 | 13818 | 13922 | 14134 | 14267 | 14413 | 14493 | 14634 | 14742 |
5 rows × 312 columns
recovered_df.head()
| Province/State | Country/Region | Lat | Long | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | ... | 11/15/20 | 11/16/20 | 11/17/20 | 11/18/20 | 11/19/20 | 11/20/20 | 11/21/20 | 11/22/20 | 11/23/20 | 11/24/20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 35092 | 35137 | 35160 | 35295 | 35350 | 35370 | 35422 | 35934 | 35976 | 36122 |
| 1 | NaN | Albania | 41.15330 | 20.168300 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 12889 | 13453 | 13804 | 14216 | 14565 | 15055 | 15469 | 15842 | 16230 | 16666 |
| 2 | NaN | Algeria | 28.03390 | 1.659600 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 44633 | 45148 | 45148 | 46326 | 46962 | 47581 | 48183 | 48794 | 49421 | 50070 |
| 3 | NaN | Andorra | 42.50630 | 1.521800 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 4747 | 4830 | 4965 | 5055 | 5127 | 5239 | 5290 | 5358 | 5405 | 5503 |
| 4 | NaN | Angola | -11.20270 | 17.873900 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 6444 | 6523 | 6582 | 6623 | 7062 | 7117 | 7273 | 7346 | 7351 | 7444 |
5 rows × 312 columns
death_df.head()
| Province/State | Country/Region | Lat | Long | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | ... | 11/15/20 | 11/16/20 | 11/17/20 | 11/18/20 | 11/19/20 | 11/20/20 | 11/21/20 | 11/22/20 | 11/23/20 | 11/24/20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1617 | 1626 | 1638 | 1645 | 1650 | 1661 | 1675 | 1687 | 1695 | 1712 |
| 1 | NaN | Albania | 41.15330 | 20.168300 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 623 | 631 | 637 | 646 | 657 | 672 | 685 | 699 | 716 | 735 |
| 2 | NaN | Algeria | 28.03390 | 1.659600 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 2154 | 2168 | 2186 | 2206 | 2224 | 2236 | 2255 | 2272 | 2294 | 2309 |
| 3 | NaN | Andorra | 42.50630 | 1.521800 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 76 | 76 | 76 | 76 | 76 | 76 | 76 | 76 | 76 | 76 |
| 4 | NaN | Angola | -11.20270 | 17.873900 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 322 | 324 | 328 | 332 | 333 | 334 | 336 | 337 | 337 | 338 |
5 rows × 312 columns
country_df.head()
| Country_Region | Last_Update | Lat | Long_ | Confirmed | Deaths | Recovered | Active | Incident_Rate | People_Tested | People_Hospitalized | Mortality_Rate | UID | ISO3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 2020-11-25 12:26:29 | 33.93911 | 67.709953 | 45490.0 | 1725.0 | 36145.0 | 7620.0 | 116.855738 | NaN | NaN | 3.792042 | 4 | AFG |
| 1 | Albania | 2020-11-25 12:26:29 | 41.15330 | 20.168300 | 34300.0 | 735.0 | 16666.0 | 16899.0 | 1191.882688 | NaN | NaN | 2.142857 | 8 | ALB |
| 2 | Algeria | 2020-11-25 12:26:29 | 28.03390 | 1.659600 | 77000.0 | 2309.0 | 50070.0 | 24621.0 | 175.594455 | NaN | NaN | 2.998701 | 12 | DZA |
| 3 | Andorra | 2020-11-25 12:26:29 | 42.50630 | 1.521800 | 6351.0 | 76.0 | 5503.0 | 772.0 | 8219.763153 | NaN | NaN | 1.196662 | 20 | AND |
| 4 | Angola | 2020-11-25 12:26:29 | -11.20270 | 17.873900 | 14742.0 | 338.0 | 7444.0 | 6960.0 | 44.854499 | NaN | NaN | 2.292769 | 24 | AGO |
# data cleaning
# renaming the df column names to lowercase
country_df.columns = map(str.lower, country_df.columns)
confirmed_df.columns = map(str.lower, confirmed_df.columns)
death_df.columns = map(str.lower, death_df.columns)
recovered_df.columns = map(str.lower, recovered_df.columns)
# changing province/state to state and country/region to country
confirmed_df = confirmed_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
recovered_df = confirmed_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
death_df = death_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
country_df = country_df.rename(columns={'country_region': 'country'})
# country_df.head()
# total number of confirmed, death and recovered cases
confirmed_total = int(country_df['confirmed'].sum())
deaths_total = int(country_df['deaths'].sum())
recovered_total = int(country_df['recovered'].sum())
active_total = int(country_df['active'].sum())
# displaying the total stats
display(HTML("<div style = 'background-color: #090C02; padding: 30px '>" +
"<span style='color: #FFE5D9; font-size:30px;'> Confirmed: " + str(confirmed_total) +"</span>" +
"<span style='color: #ED1C24; font-size:30px;margin-left:20px;'> Deaths: " + str(deaths_total) + "</span>"+
"<span style='color: #D8E2DC; font-size:30px; margin-left:20px;'> Recovered: " + str(recovered_total) + "</span>"+
"</div>")
)
# sorting the values by confirmed descednding order
# country_df.sort_values('confirmed', ascending= False).head(10).style.background_gradient(cmap='copper')
fig = go.FigureWidget( layout=go.Layout() )
def highlight_col(x):
r = 'background-color: red'
y = 'background-color: purple'
g = 'background-color: grey'
df1 = pd.DataFrame('', index=x.index, columns=x.columns)
df1.iloc[:, 4] = y
df1.iloc[:, 5] = r
df1.iloc[:, 6] = g
return df1
def show_latest_cases(n):
n = int(n)
return country_df.sort_values('confirmed', ascending= False).head(n).style.apply(highlight_col, axis=None)
interact(show_latest_cases, n='10')
ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)
| country | last_update | lat | long_ | confirmed | deaths | recovered | active | incident_rate | people_tested | people_hospitalized | mortality_rate | uid | iso3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 176 | US | 2020-11-25 12:26:29 | 40.000000 | -100.000000 | 12598660.000000 | 259976.000000 | 4696664.000000 | 7642011.000000 | 3823.960341 | nan | nan | 2.063521 | 840 | USA |
| 79 | India | 2020-11-25 12:26:29 | 20.593684 | 78.962880 | 9222216.000000 | 134699.000000 | 8642771.000000 | 444746.000000 | 668.274398 | nan | nan | 1.460593 | 356 | IND |
| 23 | Brazil | 2020-11-25 12:26:29 | -14.235000 | -51.925300 | 6118708.000000 | 170115.000000 | 5509645.000000 | 438948.000000 | 2878.587228 | nan | nan | 2.780244 | 76 | BRA |
| 62 | France | 2020-11-25 12:26:29 | 46.227600 | 2.213700 | 2206126.000000 | 50327.000000 | 160257.000000 | 1994750.000000 | 3379.818141 | nan | nan | 2.281239 | 250 | FRA |
| 141 | Russia | 2020-11-25 12:26:29 | 61.524000 | 105.318800 | 2144229.000000 | 37173.000000 | 1646737.000000 | 460319.000000 | 1469.309579 | nan | nan | 1.733630 | 643 | RUS |
| 160 | Spain | 2020-11-25 12:26:29 | 40.463667 | -3.749220 | 1594844.000000 | 43668.000000 | 150376.000000 | 1400800.000000 | 3411.082028 | nan | nan | 2.738073 | 724 | ESP |
| 180 | United Kingdom | 2020-11-25 12:26:29 | 55.000000 | -3.000000 | 1542623.000000 | 55935.000000 | 3410.000000 | 1483278.000000 | 2272.372668 | nan | nan | 3.625967 | 826 | GBR |
| 85 | Italy | 2020-11-25 12:26:29 | 41.871900 | 12.567400 | 1455022.000000 | 51306.000000 | 605330.000000 | 798386.000000 | 2406.513412 | nan | nan | 3.526132 | 380 | ITA |
| 6 | Argentina | 2020-11-25 12:26:29 | -38.416100 | -63.616700 | 1381795.000000 | 37432.000000 | 1210634.000000 | 133729.000000 | 3057.354230 | nan | nan | 2.708940 | 32 | ARG |
| 37 | Colombia | 2020-11-25 12:26:29 | 4.570900 | -74.297300 | 1262494.000000 | 35677.000000 | 1167857.000000 | 58960.000000 | 2481.176185 | nan | nan | 2.825914 | 170 | COL |
sorted_country_df = country_df.sort_values('confirmed', ascending= False)
# # plotting the 20 worst hit countries
def bubble_chart(n):
fig = px.scatter(sorted_country_df.head(n), x="country", y="confirmed", size="confirmed", color="country",
hover_name="country", size_max=60)
fig.update_layout(
title=str(n) +" Worst hit countries",
xaxis_title="Countries",
yaxis_title="Confirmed Cases",
width = 700
)
fig.show();
interact(bubble_chart, n=10)
ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none'
widgets.VBox([fig], layout=ipywLayout)
def plot_cases_of_a_country(country):
labels = ['confirmed', 'deaths']
colors = ['blue', 'red']
mode_size = [6, 8]
line_size = [4, 5]
df_list = [confirmed_df, death_df]
fig = go.Figure();
for i, df in enumerate(df_list):
if country == 'World' or country == 'world':
x_data = np.array(list(df.iloc[:, 20:].columns))
y_data = np.sum(np.asarray(df.iloc[:,4:]),axis = 0)
else:
x_data = np.array(list(df.iloc[:, 20:].columns))
y_data = np.sum(np.asarray(df[df['country'] == country].iloc[:,20:]),axis = 0)
fig.add_trace(go.Scatter(x=x_data, y=y_data, mode='lines+markers',
name=labels[i],
line=dict(color=colors[i], width=line_size[i]),
connectgaps=True,
text = "Total " + str(labels[i]) +": "+ str(y_data[-1])
));
fig.update_layout(
title="COVID 19 cases of " + country,
xaxis_title='Date',
yaxis_title='No. of Confirmed Cases',
margin=dict(l=20, r=20, t=40, b=20),
paper_bgcolor="lightgrey",
width = 800,
);
fig.update_yaxes(type="linear")
fig.show();
interact(plot_cases_of_a_country, country='World')
ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)
px.bar(
sorted_country_df.head(10),
x = "country",
y = "confirmed",
title= "Top 10 worst affected countries", # the axis names
color_discrete_sequence=["pink"],
height=500,
width=800
)
px.bar(
sorted_country_df.head(10),
x = "country",
y = "deaths",
title= "Top 10 worst affected countries", # the axis names
color_discrete_sequence=["pink"],
height=500,
width=800
)
px.bar(
sorted_country_df.head(10),
x = "country",
y = "recovered",
title= "Top 10 worst affected countries", # the axis names
color_discrete_sequence=["pink"],
height=500,
width=800
)
import math
world_map = folium.Map(location=[11,0], tiles="cartodbpositron", zoom_start=2, max_zoom = 6, min_zoom = 2)
for i in range(0,len(confirmed_df)):
if (-360<confirmed_df.iloc[i]['lat']<360 and -360<confirmed_df.iloc[i]['long']<360 ):
folium.Circle(
location=[confirmed_df.iloc[i]['lat'], confirmed_df.iloc[i]['long']],
fill=True,
radius=(int((np.log(confirmed_df.iloc[i,-1]+1.00001)))+0.2)*50000,
color='red',
fill_color='indigo',
tooltip = "<div style='margin: 0; background-color: black; color: white;'>"+
"<h4 style='text-align:center;font-weight: bold'>"+confirmed_df.iloc[i]['country'] + "</h4>"
"<hr style='margin:10px;color: white;'>"+
"<ul style='color: white;;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
"<li>Confirmed: "+str(confirmed_df.iloc[i,-1])+"</li>"+
"<li>Deaths: "+str(death_df.iloc[i,-1])+"</li>"+
"<li>Death Rate: "+ str(np.round(death_df.iloc[i,-1]/(confirmed_df.iloc[i,-1]+1.00001)*100,2))+ "</li>"+
"</ul></div>",
).add_to(world_map)
world_map